keep = ls()

###############
#Load Raw Data#
###############

#Raw ICPSR 0001 elections data (aggregated to single file)
elections = fread('./raw/full_elections_long.csv', 
                  colClasses = c(ICPR_STATE_CODE = 'character', party = 'character',
                                 COUNTY_ID = 'character'))

state_fips_icpsr = fread('./raw/state_fips_icpsr.csv')
state_fips_icpsr[, ICPR_STATE_CODE := sprintf('%02.f', STICPSR)]

#Merge in FIPS codes/state name
setkey(elections, ICPR_STATE_CODE)
setkey(state_fips_icpsr, ICPR_STATE_CODE)
elections = state_fips_icpsr[elections]

#Load county boundary-change crosswalk (for areal interpolation)
boundary_crosswalk = fread('./raw/1860_county_crosswalk.csv') %>%
  .[pct_from_in_target > 0 & pct_target_in_from > 0]
boundary_crosswalk[, from_start_date := as.Date(from_start_date)]
boundary_crosswalk[, from_end_date := as.Date(from_end_date)]
boundary_crosswalk[, from_state := from_state %>% toupper]

#Load county size
county_size_1860 = fread('./raw/1860_county_areas.csv') 

setkey(boundary_crosswalk, target_id, target_full_name,  target_name, target_state, target_fips, target_type)
setkey(county_size_1860, target_id, target_full_name,  target_name, target_state, target_fips, target_type)

#Merge county size in 1860 to boundary crosswalk
boundary_crosswalk = county_size_1860[boundary_crosswalk]
boundary_crosswalk[, target_state := target_state %>% toupper]
boundary_crosswalk[, target_state := str_replace(target_state, "\\sTERRITORY$", "")]

#Clean elections
################

#Clean county name
elections[, COUNTY_NAME := str_trim(COUNTY_NAME)]

#Select general elections with votes in president, governor, congress
elections = elections[type %in% 'G' & office %in% c('PRES', 'CONG', 'GOV')]
elections = elections[!(is.na(total_vote) & is.na(total_vote_alt)) & !is.na(vote)]

#Party number to name key
party_key = scan('./raw/icpsrPartyCodes.txt', what = 'character', sep = '\n') %>%
  str_trim(.) %>% str_split(., '(?<=\\d\\d\\d\\d)  ') %>% 
  lapply(t) %>% lapply(data.table) %>%
  rbindlist(.)
setnames(party_key, paste0('V',1:2), c('party_number', 'party_name'))
party_key[, party_name := str_replace_all(party_name, "[^A-Z]", "_")]
party_key[, party_name := str_replace_all(party_name, "[_]++", "_")]

#Merge in party name
setkey(elections, party)
setkey(party_key, party_number)
elections = party_key[elections]

#Collapse votes to election-consistuency-party level
party_vars = setdiff(names(elections), c('vote'))
elections = elections[, list(votes = sum(vote)),
          by = party_vars]
elections = elections[year %in% 1830:1920]

################################
#Interpolate to 1860 boundaries#
################################
#Merge to 1860 counties, 
#weighting by proportion of county in 1860 county boundary

elections_names = elections[, list(state_name = STNAME, name = COUNTY_NAME, COUNTY_ID, year)] %>% unique
setkey(elections_names, state_name)
setkey(boundary_crosswalk, target_state)

election_years = elections_names$year %>% unique %>% sort
crosswalkList = vector('list', length(election_years))

#Generate weights for each election year
for (i in seq_along(election_years)) {
  y = election_years[i]
  print (y)
  y_date = as.Date(paste0(y,'-01-01'))
  temp = boundary_crosswalk[elections_names[year %in% y], allow.cartesian = T]
  temp[, in_date := from_start_date <= y_date & from_end_date >= y_date]
  temp[, any_date := any(in_date), by = list(target_state)]
  temp = temp[(in_date)]
  temp[, dist := stringdist(name, from_name, method = 'jw'), by = list(target_name, name)]
  temp[, best_data := dist == min(dist), by = list(target_name, name)]
  temp[, best_xwalk := dist == min(dist), by = list(target_name, from_name)]
  temp = temp[(best_data) & (best_xwalk), list(from_state, name, COUNTY_ID, year, target_name, target_state , target_fips, pct_from_in_target, pct_target_in_from, target_km2)]
  crosswalkList[[i]] = temp
}

crosswalk_elections = rbindlist(crosswalkList)

#Merge interpolation weights back into 1860 data
setkey(elections, STNAME, COUNTY_NAME, COUNTY_ID, year)
setkey(crosswalk_elections, target_state, name, COUNTY_ID, year)
elections_1860 = crosswalk_elections[elections, allow.cartesian = T]

#Reweight votes for interpolation
elections_1860 = elections_1860[!is.na(target_name), list(
      votes = sum(votes*pct_from_in_target, na.rm = T),
      total_vote = sum(total_vote*pct_from_in_target, na.rm = T),
      total_vote_alt = sum(total_vote_alt*pct_from_in_target, na.rm = T),
      cong_dist = list(unique(CONG_DIST))
     ), 
     by = list(target_state, target_name, target_fips, year, office, office_no, type, flag_count, party_number, party_name)]

elections_1860 = elections_1860[!is.na(target_name), list(
  votes = mean(votes, na.rm = T),
  total_vote = mean(total_vote, na.rm = T),
  total_vote_alt = mean(total_vote_alt, na.rm = T),
  cong_dist = c(cong_dist)
), 
by = list(target_state, target_name, target_fips, year, office, office_no, type, party_number, party_name)]

#calculate GOP VS (using aliases from 8 states used in analyses)
gop_aliases = c('MODERN_REPUBLICAN', 'UNION', 'UNION_REPUBLICAN', 'UNIONIST')
elections_1860[year >= 1854, gop_vote := votes[party_name %in% gop_aliases] %>% sum(na.rm=T), 
               by = list(target_state, target_name, target_fips, year, office, office_no, type)]
elections_1860[, vs_gop := gop_vote / total_vote]
elections_1860[, vs_gop_alt := gop_vote / total_vote_alt]

#anti-slavery VS
anti_slavery_names = c('ANTI_NEBRASKA_DEMOCRAT','ANTI_LECOMPTON_DEMOCRAT', 
                 'AMERICAN_AND_SCATTERING', 'AMERICAN', 'DEMOCRAT_AND_AMERICAN',
                 'FILLMORE_AMERICAN', 'FREMONT_AMERICAN',
                 'FREE_SOIL', 'FREE_SOIL_AND_SCATTERING', 'FREE_SOIL_DEMOCRAT', 'WHIG_FREE_SOIL',
                 'LIBERTY')
elections_1860[year < 1861, anti_slavery_vote := votes[party_name %in% anti_slavery_names] %>% sum(na.rm=T),
               by = list(target_state, target_name, target_fips, year, office, office_no, type)]
elections_1860[, vs_antislavery := anti_slavery_vote / total_vote]

#Freesoil
freesoil = c('FREE_SOIL', 'FREE_SOIL_AND_SCATTERING', 'FREE_SOIL_DEMOCRAT', 'WHIG_FREE_SOIL')
elections_1860[year < 1861, freesoil_vote := votes[party_name %in% freesoil] %>% sum(na.rm=T),
               by = list(target_state, target_name, target_fips, year, office, office_no, type)]
elections_1860[, vs_freesoil := freesoil_vote / total_vote]

#Liberty
liberty = c('LIBERTY')
elections_1860[year < 1861, liberty_vote := votes[party_name %in% liberty] %>% sum(na.rm=T),
               by = list(target_state, target_name, target_fips, year, office, office_no, type)]
elections_1860[, vs_liberty := liberty_vote / total_vote]

#american
american = c('AMERICAN_AND_SCATTERING', 'AMERICAN', 'DEMOCRAT_AND_AMERICAN', 'FILLMORE_AMERICAN', 'FREMONT_AMERICAN')
elections_1860[year < 1861, american_vote := votes[party_name %in% american] %>% sum(na.rm=T),
               by = list(target_state, target_name, target_fips, year, office, office_no, type)]
elections_1860[, vs_american := american_vote / total_vote]

#Any voteshare
elections_1860[, vs := votes / total_vote_alt]
elections_1860[, party_id := paste0('vs_',party_number)]
elections_1860_wide = dcast.data.table(elections_1860, 
                 target_state + target_name + target_fips + year + office + office_no + type +
                   vs_gop + vs_gop_alt + vs_antislavery + vs_freesoil + vs_liberty + vs_american + total_vote + total_vote_alt ~ party_id, 
                 value.var = 'vs', fun.aggregate = mean, fill = 0)

#Relocate congressional elections to cycle year (Only CT in this data)
elections_1860_wide[office %in% 'CONG' & ((year %% 2) != 0), year := year - 1L]

#Create pre-war county-level voteshares
vs_vars = names(elections_1860_wide) %>% str_detect('^vs_') %>% names(elections_1860_wide)[.]

election_covars_1860_wide = dcast.data.table(elections_1860_wide[year <= 1860], 
                 target_state + target_name + target_fips ~ year + office,
                 value.var = c(vs_vars, 'total_vote', 'total_vote_alt'), fun.aggregate = mean, fill = 0, drop = T)
election_covars_1860_wide = election_covars_1860_wide[, which(unlist(lapply(election_covars_1860_wide, function(x)!all(x==0)))), with = F] 
vs_vars = names(election_covars_1860_wide) %>% str_detect('^vs_') %>% names(election_covars_1860_wide)[.]
setnames(election_covars_1860_wide, vs_vars, paste0('pre_',vs_vars))

#Create county-level wartie and postwar outcome measures
election_outcomes_1860 = dcast.data.table(elections_1860_wide[year > 1860], 
                 target_state + target_name + target_fips ~ year + office,
                 value.var = c('vs_gop', 'vs_gop_alt', 'vs_0100', 'total_vote', 'total_vote_alt'), fun.aggregate = mean, drop = T)

#Merge covars and outcomes together:
setkey(election_covars_1860_wide, target_state, target_name, target_fips)
setkey(election_outcomes_1860, target_state, target_name, target_fips)

election_data_wide = election_outcomes_1860[election_covars_1860_wide] 

fwrite(election_data_wide, './cleaned/elections_wide.csv', row.names = F)

#Cleanup
rm(list = setdiff(ls(), c(keep, 'keep')))
gc()